#!/usr/bin/perl -w
use strict;
use warnings;

use Getopt::Std;

my (%opt);
getopts('haf:p:r:c:s:',\%opt);
die &usage() if (tutej_any($opt{m}));
&usage() if $opt{h};

#Input file format:

my $pdb=$opt{f};
my $chid=$opt{p};

#read list of rare variants
my @rare=open_file($opt{r});
#read list of common variants
my @common=open_file($opt{c});

#parse PDB swissprot mapping
my @sifts=open_file($opt{s});
my (%pdb2uni);  
for (my $i=0;$i<=$#sifts;$i++){
  my @lin=split(/\t/,$sifts[$i]);
  if($lin[2] eq $opt{a}){
    my $pdbch=$lin[0].$lin[1];
    if(exists $pdb2uni{$pdbch}){$pdb2uni{$pdbch}=$pdb2uni{$pdbch}."-".$i;}
    else{$pdb2uni{$pdbch}=$i;}
  }
}

#find pdbID for query uniAC
my $mypdbch = $pdb.$chid;
#get mapping 
my(@frags,@frage,%shift);
my @all=split(/-/,$pdb2uni{$mypdbch});
print "$pdb2uni{$mypdbch}\n";
foreach my $maped(@all){
  my @mapping=split(/\t/,$sifts[$maped]);
  my $UPseq_s=$mapping[7];
  my $PDBseq_s=$mapping[5];
  push (@frags,$mapping[7]);
  push (@frage,$mapping[8]);
  #print "$UPseq_s $PDBseq_s\n";
  $shift{$mapping[7]}=$PDBseq_s-$UPseq_s;
#print "$sifts[$maped]\n";
  print "$mapping[2] PDB seq start: $mapping[5] - Uniprot seq start: $mapping[7] = $shift{$mapping[7]}\n";
}

my ($resr,$resc);
foreach my $lin(@rare){
  my $jest = 0;
  my @aa=split(/\t/,$lin);
  for (my $k=0;$k<=$#frags;$k++){
    my $unis=$frags[$k];
    my $unie=$frage[$k];
    if($aa[1] >= $unis and $aa[1] <= $unie){
      $jest=1;
      my $pos=$aa[1]+$shift{$unis};
      if ($resr){$resr=$resr."+".$pos unless $pos < 1;}
      else{$resr=$pos unless $pos < 1;}
    }
  }
  if($jest == 0){print "Rare variant residue $aa[1] not present in PDB: fragments of structure not resolved\n";}
}
foreach my $lin(@common){
  my $jest = 0;
  my @aa=split(/\t/,$lin);
  for (my $k=0;$k<=$#frags;$k++){
    my $unis=$frags[$k];
    my $unie=$frage[$k];
    if($aa[1] > $unis and $aa[1] <= $unie){
      $jest=1;
      my $pos=$aa[1]+$shift{$unis};
      if ($resc){$resc=$resc."+".$pos unless $pos < 1;}
      else{$resc=$pos unless $pos < 1;}
    }
  }
  if($jest == 0){print "Common variant residue $aa[1] not present in PDB: fragments of structure not resolved\n";}
}
unless ($resc or $resr){print "none of SNVs present in pdb structure\n";}
#prepare pymol script
open(PML, ">displaySNVs.pml") or die "Can not open an output file: $!";
#print PML "log_open mapListofResidues.log\n";
#print PML "load pdb$pdb.ent\n";
#	define new colors
print PML "set_color dblue, [30,144,255]\n";#dodger blue
print PML "set_color fgreen, [34,139,34]\n";#forest green
print PML "set_color fire, [178,34,34]\n";#firebrick
print PML "set_color orange, [255,165,0]\n";#orange
print PML "set_color orchid, [153,50,204]\n";#dark orchid
print PML "set_color vred, [208,32,144]\n";#violet red
#fetch biological assembly
print PML "fetch $pdb, type=pdb1, async=0\n";
print PML "split_state $pdb\n";
#modify general display
print PML "show cartoon\n";
print PML "hide lines\n";
print PML "remove solvent\n";
print PML "util.cbc\n";
print PML "show spheres, organic\n";
print PML "util.cbag organic\n";
#display rare and common variants
print PML "select rare,resid $resr\n" if ($resr);
print PML "select common,resid $resc\n" if ($resc);;
#	group 1
print PML "color orchid, rare\n";
print PML "show sticks, rare\n";
#	group 2
print PML "color orange, common\n";
print PML "show sticks, common\n";


sub open_file{
        my ($file_name)=@_;
        open(INP1, "< $file_name") or die "Can not open an input file: $!";
        my @file1=<INP1>;
        close (INP1);
        chomp @file1;
        return @file1;
}

sub usage(){
print STDERR << "EOF";
Usage: snv2pdb.pl -f [pdb file] [arguments]

 -p	: protein chain ID
 -u	: uniprotAC
 -r	: rare SNV 
 -c	: common SNV
 -s 	: SIFTS mapping file
 -h	: help message

EOF
exit;
}
sub tutej_any { ( grep $_, @_ ) < 6 }

